Evaluation of RISK survival models

This document highlights the use of

for the evaluation (RRPlot), and calibration of cox models (CoxRiskCalibration) or logistic models (CalibrationProbPoissonRisk) of survival data.

Furthermore, it can be used to evaluate any Risk index that reruns the probability of a future event on external data-set.

This document will use the survival::rotterdam, and survival::gbsg data-sets to train and predict the risk of cancer recurrence after surgery. Both Cox and Logistic models will be trained and evaluated.

Here are some sample plots returned by the evaluated functions:

The libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

Breast Cancer Royston-Altman data

data(gbsg, package=“survival”) and data(rotterdam, package=“survival”)

gbsgdata <- gbsg
rownames(gbsgdata) <- gbsgdata$pid
gbsgdata$pid <- NULL

odata <-rotterdam
rownames(odata) <- odata$pid
odata$pid <- NULL
odata$rfstime <- odata$rtime
odata$status <- odata$recur
odata$rtime <- NULL
odata$recur <- NULL

odata <- odata[,colnames(odata) %in% colnames(gbsgdata)]

odata$size <- 10*(odata$size=="<=20") + 
  35*(odata$size=="20-50") + 
  60*(odata$size==">50")

data <- as.data.frame(model.matrix(Surv(rfstime,status)~.*.,odata))

data$`(Intercept)` <- NULL

dataBrestCancerTrain <- cbind(time=odata[rownames(data),"rfstime"],status=odata[rownames(data),"status"],data)

colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),":","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain)," ","")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),"\\.","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),"-","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),">","_")
dataBrestCancerTrain$time <- dataBrestCancerTrain$time/365 ## To years


pander::pander(table(odata[rownames(data),"status"]),caption="rotterdam")
rotterdam
0 1
1464 1518

data(gbsg, package=“survival”) data conditioning

gbsgdata <- gbsgdata[,colnames(odata)]
data <- as.data.frame(model.matrix(Surv(rfstime,status)~.*.,gbsgdata))

data$`(Intercept)` <- NULL

dataBrestCancerTest <- cbind(time=gbsgdata[rownames(data),"rfstime"],status=gbsgdata[rownames(data),"status"],data)

colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),":","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest)," ","")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),"\\.","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),"-","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),">","_")
dataBrestCancerTest$time <- dataBrestCancerTest$time/365

pander::pander(table(odata[rownames(data),"status"]), caption="gbsg")
gbsg
0 1
499 183

Cox Modeling

ml <- BSWiMS.model(Surv(time,status)~.,data=dataBrestCancerTrain,loops=1,NumberofRepeats = 5)

—–.

sm <- summary(ml)
pander::pander(sm$coefficients)
  Estimate lower HR upper u.Accuracy r.Accuracy full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI z.NRI Delta.AUC Frequency
age_nodes 0.000716 1.001 1.001 1.001 0.626 0.600 0.632 0.630 0.601 0.634 0.03040 0.4594 12.81 14.37 0.033056 1
size_grade 0.005649 1.005 1.006 1.006 0.598 0.623 0.632 0.599 0.626 0.634 0.01868 0.3914 9.82 11.29 0.007947 1
nodes 0.086582 1.082 1.090 1.099 0.637 0.642 0.643 0.640 0.643 0.644 0.00745 0.0564 8.33 1.66 0.000148 1
size 0.006888 1.005 1.007 1.009 0.595 0.641 0.643 0.595 0.642 0.644 0.01447 0.3587 8.05 9.97 0.001322 1
size_nodes -0.000378 1.000 1.000 1.000 0.624 0.643 0.643 0.629 0.644 0.644 0.00346 0.3430 7.25 9.57 -0.000377 1
age_size -0.000149 1.000 1.000 1.000 0.567 0.627 0.632 0.568 0.630 0.634 0.00635 0.1935 5.95 5.36 0.004078 1
grade 0.204934 1.146 1.227 1.314 0.565 0.637 0.643 0.561 0.638 0.644 0.00926 0.2069 5.88 6.31 0.005344 1
age -0.003113 0.996 0.997 0.998 0.513 0.628 0.643 0.513 0.628 0.644 0.00416 0.0917 5.27 2.51 0.015465 1
grade_nodes -0.013784 0.981 0.986 0.992 0.635 0.645 0.643 0.639 0.646 0.644 0.00207 -0.0910 5.03 -2.55 -0.002609 1

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

timeinterval <- 5 # Five years

h0 <- sum(dataBrestCancerTrain$status & dataBrestCancerTrain$time <= timeinterval)
h0 <- h0/sum((dataBrestCancerTrain$time > timeinterval) | (dataBrestCancerTrain$status==1))

pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.429 5
index <- predict(ml,dataBrestCancerTrain)
rdata <- cbind(dataBrestCancerTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

By Risk Categories

obsexp <- rrAnalysisTrain$OERatio$atThrEstimates
pander::pander(obsexp)
  Observed L.CI H.CI Expected pvalue
Total 1518 1443 1596 1467 0.1829
low 816 761 874 851 0.2303
90% 248 218 281 210 0.0106
80% 454 413 498 402 0.0101
maxx <- 1.1*max(c(obsexp$Observed,obsexp$Expected))
minx <- 0.9*min(c(obsexp$Observed,obsexp$Expected))

plot(obsexp$Expected,obsexp$Observed,
     xlim=c(minx,maxx),
     ylim=c(minx,maxx),
     main="Expected vs Observed",
     ylab="Observed",
     xlab="Expected",
     col=rainbow(nrow(obsexp)),
     log="xy")

errbar(obsexp$Expected,obsexp$Observed,obsexp$L.CI,obsexp$H.CI,add=TRUE,pch=0,errbar.col=rainbow(nrow(obsexp)),cex=0.75)
lines(x=c(1,maxx),y=c(1,maxx),lty=2)
text(obsexp$Expected,obsexp$Observed,rownames(obsexp),pos=2,cex=0.75)

The Time vs. Events are not calibrated. Lets do the calibration

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.459 0.389 0.320 0.214 0.18549 0.4996
RR 1.690 1.713 1.799 2.376 1.00000 1.7255
RR_LCI 1.586 1.603 1.666 1.869 0.00000 1.6196
RR_UCI 1.802 1.830 1.942 3.019 0.00000 1.8383
SEN 0.299 0.462 0.644 0.965 1.00000 0.2464
SPE 0.900 0.798 0.646 0.125 0.00137 0.9310
BACC 0.599 0.630 0.645 0.545 0.50068 0.5887
NetBenefit 0.110 0.172 0.246 0.374 0.39742 0.0916
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.03 0.984 1.09 0.183
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.14 1.14 1.13 1.15
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.31 1.31 1.31 1.32
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.676 0.676 0.662 0.69
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.694 0.675 0.713
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.299 0.276 0.323
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.459 0.389
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 465.079317 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1985 816 1144 93.9 385.7
class=1 396 248 177 28.0 31.8
class=2 601 454 197 336.3 391.3

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataBrestCancerTrain,"status","time")

( 7.169531 , 26976.42 , 1.07054 , 1518 , 1806.352 )

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.711 1.37 7.17

The RRplot() of the calibrated model

rcaldata <- cbind(dataBrestCancerTrain$status,calprob$prob)


rrAnalysisCalTrain <- RRPlot(rcaldata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Cal. Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=calprob$timeInterval)

By Risk Categories

obsexp <- rrAnalysisCalTrain$OERatio$atThrEstimates
pander::pander(obsexp)
  Observed L.CI H.CI Expected pvalue
Total 1518 1443 1596 1694 1.44e-05
low 816 761 874 983 4.09e-08
90% 248 218 281 243 7.24e-01
80% 454 413 498 464 6.76e-01
maxx <- 1.1*max(c(obsexp$Observed,obsexp$Expected))
minx <- 0.9*min(c(obsexp$Observed,obsexp$Expected))

plot(obsexp$Expected,obsexp$Observed,
     xlim=c(minx,maxx),
     ylim=c(minx,maxx),
     main="Cal. Expected vs Observed",
     ylab="Observed",
     xlab="Expected",
     col=rainbow(nrow(obsexp)),
     log="xy")

errbar(obsexp$Expected,obsexp$Observed,obsexp$L.CI,obsexp$H.CI,add=TRUE,pch=0,errbar.col=rainbow(nrow(obsexp)),cex=0.75)
lines(x=c(1,maxx),y=c(1,maxx),lty=2)
text(obsexp$Expected,obsexp$Observed,rownames(obsexp),pos=2,cex=0.75)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.459 0.389 0.320 0.214 0.18549 0.4996
RR 1.690 1.713 1.799 2.376 1.00000 1.7255
RR_LCI 1.586 1.603 1.666 1.869 0.00000 1.6196
RR_UCI 1.802 1.830 1.942 3.019 0.00000 1.8383
SEN 0.299 0.462 0.644 0.965 1.00000 0.2464
SPE 0.900 0.798 0.646 0.125 0.00137 0.9310
BACC 0.599 0.630 0.645 0.545 0.50068 0.5887
NetBenefit 0.110 0.172 0.246 0.374 0.39742 0.0916
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.03 0.984 1.09 0.183
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.14 1.14 1.13 1.15
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.31 1.31 1.31 1.32
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.676 0.676 0.662 0.69
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.694 0.675 0.713
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.299 0.276 0.323
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.459 0.389
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 465.079317 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1985 816 1144 93.9 385.7
class=1 396 248 177 28.0 31.8
class=2 601 454 197 336.3 391.3

Performance on the external data set

index <- predict(ml,dataBrestCancerTest)
pp <- predictionStats_binary(cbind(dataBrestCancerTest$status,index),plotname="Breast Cancer")

par(op)


prob <- ppoisGzero(index,h0)
rdata <- cbind(dataBrestCancerTest$status,prob)
rrCoxTestAnalysis <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

External Data Report

pander::pander(t(rrCoxTestAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.459 @:0.389 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.4583 0.389 0.416 0.223 1.77e-01 0.4987
RR 1.7994 1.643 1.758 3.279 2.64e+01 1.7127
RR_LCI 1.5366 1.395 1.498 1.641 5.65e-02 1.4543
RR_UCI 2.1071 1.934 2.063 6.552 1.23e+04 2.0169
SEN 0.3311 0.452 0.418 0.977 1.00e+00 0.2742
SPE 0.8734 0.757 0.809 0.111 1.55e-02 0.8915
BACC 0.6022 0.604 0.613 0.544 5.08e-01 0.5829
NetBenefit 0.0839 0.110 0.105 0.282 3.17e-01 0.0586
pander::pander(t(rrCoxTestAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.44 1.28 1.62 2.12e-09
pander::pander(rrCoxTestAnalysis$c.index,caption="C. Index")
  • C Index: 0.664

  • Dxy: 0.328

  • S.D.: 0.0311

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 176737

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.664 0.664 0.635 0.695
pander::pander(t(rrCoxTestAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.66 0.619 0.7
pander::pander((rrCoxTestAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.328 0.275 0.384
pander::pander((rrCoxTestAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.873 0.836 0.905
pander::pander(t(rrCoxTestAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.459 0.389
pander::pander(rrCoxTestAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 81.471750 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 457 164 221.4 14.888 58.181
class=1 82 37 33.2 0.438 0.494
class=2 147 98 44.4 64.710 77.254

Calibrating the index on the test data

calprob <- CoxRiskCalibration(ml,dataBrestCancerTest,"status","time")

( 5.71335 , 2807.851 , 1.307008 , 299 , 327.7601 )

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.604 1.04 5.71
rdata <- cbind(dataBrestCancerTest$status,calprob$prob)

rrAnalysis <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTest$time,
                     title="Cal. Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=calprob$timeInterval)

After Calibration Report

pander::pander(t(rrAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6284 0.5247 0.5310 0.299 2.39e-01 0.5002
RR 1.7405 1.7325 1.7581 3.279 2.64e+01 1.6427
RR_LCI 1.4790 1.4751 1.4980 1.641 5.65e-02 1.3951
RR_UCI 2.0483 2.0347 2.0632 6.552 1.23e+04 1.9343
SEN 0.2676 0.4247 0.4181 0.977 1.00e+00 0.4515
SPE 0.8992 0.7984 0.8088 0.111 1.55e-02 0.7571
BACC 0.5834 0.6116 0.6134 0.544 5.08e-01 0.6043
NetBenefit 0.0205 0.0596 0.0601 0.212 2.61e-01 0.0597
pander::pander(t(rrAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.18 1.05 1.32 0.00467
pander::pander(rrAnalysis$c.index,caption="C. Index")
  • C Index: 0.664

  • Dxy: 0.328

  • S.D.: 0.0311

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 176736

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.664 0.664 0.63 0.694
pander::pander(t(rrAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.66 0.619 0.7
pander::pander((rrAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.264 0.215 0.318
pander::pander((rrAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.865 0.927
pander::pander(t(rrAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.629 0.525
pander::pander(rrAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 80.835092 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 482 173 232.4 15.20 69.5
class=1 86 47 32.0 7.02 7.9
class=2 118 79 34.6 57.14 65.4

Logistic Model

Here we train a logistic model on the same data set

## Only label subjects that present event withing five years

dataBrestCancerR <- subset(dataBrestCancerTrain, time>=5 | status==1)
dataBrestCancerR$status <- dataBrestCancerR$status * (dataBrestCancerR$time < 5)
dataBrestCancerR$time <- NULL

#ml <- BSWiMS.model(status~1,data=dataBrestCancerR,loops=20,NumberofRepeats = 5)
mlog <- BSWiMS.model(status~1,data=dataBrestCancerR,loops=1,NumberofRepeats = 5)

—–..

sm <- summary(mlog)
pander::pander(sm$coefficients)
  Estimate lower OR upper u.Accuracy r.Accuracy full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI z.NRI Delta.AUC Frequency
size_nodes 1.05e-03 1.001 1.001 1.001 0.669 0.571 0.668 0.627 0.500 0.628 0.11233 0.63654 17.86 18.870 0.128490 1
nodes 4.33e-02 1.040 1.044 1.048 0.676 0.634 0.690 0.639 0.621 0.662 0.07110 0.57106 14.13 16.179 0.040494 1
grade_nodes 1.50e-02 1.014 1.015 1.016 0.682 0.637 0.686 0.649 0.624 0.655 0.06580 0.54866 13.66 15.650 0.031087 1
age_nodes 1.06e-03 1.001 1.001 1.001 0.678 0.653 0.686 0.642 0.621 0.657 0.03346 0.21312 9.39 5.710 0.035896 1
size_grade 1.75e-03 1.001 1.002 1.002 0.632 0.682 0.686 0.626 0.646 0.655 0.01787 0.29411 6.74 7.728 0.008648 1
age_size 8.73e-05 1.000 1.000 1.000 0.608 0.682 0.686 0.577 0.649 0.657 0.01534 0.29152 6.41 7.652 0.007600 1
grade 2.27e-01 1.168 1.254 1.347 0.571 0.683 0.690 0.500 0.653 0.662 0.01340 0.19036 6.20 4.983 0.008461 1
age_meno -6.04e-03 0.992 0.994 0.996 0.571 0.676 0.686 0.500 0.645 0.657 0.00782 0.08057 4.76 2.337 0.012065 1
age_pgr -5.42e-06 1.000 1.000 1.000 0.571 0.686 0.686 0.500 0.656 0.657 0.00512 0.00745 4.11 0.194 0.000417 1
age_grade -1.65e-03 0.997 0.998 0.999 0.574 0.690 0.690 0.507 0.661 0.662 0.00454 0.11372 3.60 2.960 0.000315 1
meno_grade 1.02e-01 1.045 1.107 1.173 0.571 0.683 0.686 0.500 0.652 0.657 0.00425 0.20428 3.47 5.343 0.004441 1
nodes_hormon -1.38e-02 0.979 0.986 0.994 0.587 0.688 0.686 0.526 0.658 0.655 0.00280 0.45522 3.44 12.150 -0.002853 1
size 3.94e-03 1.002 1.004 1.006 0.611 0.693 0.690 0.618 0.663 0.662 0.00507 0.21050 3.42 5.600 -0.001075 1
meno_pgr 3.19e-04 1.000 1.000 1.001 0.571 0.687 0.686 0.500 0.657 0.657 0.00316 0.05977 3.35 1.558 -0.000429 1
pgr -1.07e-04 1.000 1.000 1.000 0.571 0.689 0.686 0.500 0.659 0.655 0.00257 0.19759 2.64 5.745 -0.004123 1
meno_nodes -2.60e-02 0.955 0.974 0.994 0.640 0.686 0.686 0.595 0.656 0.657 0.00264 -0.06329 2.59 -1.645 0.000631 1
grade_pgr -3.51e-05 1.000 1.000 1.000 0.571 0.669 0.668 0.500 0.627 0.628 0.00241 0.17471 2.55 5.058 0.001252 1
meno_size 2.34e-03 1.000 1.002 1.004 0.604 0.691 0.690 0.578 0.663 0.662 0.00185 0.10227 2.43 2.662 -0.001378 1

Logistic Model Performance

op <- par(no.readonly = TRUE)


cprob <- predict(mlog,dataBrestCancerTrain)

rdata <- cbind(dataBrestCancerTrain$status,cprob)
rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Logistic Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=5.0)

par(op)

Training Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.542 0.431 0.394 0.255 0.130969 0.500
RR 1.765 1.739 1.799 2.213 1.000000 1.773
RR_LCI 1.659 1.627 1.676 1.764 0.000000 1.665
RR_UCI 1.879 1.858 1.931 2.777 0.000000 1.888
SEN 0.327 0.470 0.566 0.962 1.000000 0.374
SPE 0.900 0.799 0.731 0.125 0.000683 0.874
BACC 0.613 0.635 0.648 0.543 0.500342 0.624
NetBenefit 0.108 0.165 0.202 0.342 0.435125 0.129
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.817 0.776 0.859 3.78e-16
pander::pander(rrAnalysisTrain$c.index,caption="C. Index")
  • C Index: 0.68

  • Dxy: 0.36

  • S.D.: 0.014

  • n: 2982

  • missing: 0

  • uncensored: 1518

  • Relevant Pairs: 6184528

  • Concordant: 4206586

  • Uncertain: 2703838

  • cstatCI:

    mean.C Index median lower upper
    0.68 0.68 0.666 0.694
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.695 0.677 0.714
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.327 0.303 0.351
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.541 0.431
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 541.976716 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1974 804 1144 100.9 415.3
class=1 365 218 170 13.4 15.1
class=2 643 496 204 418.2 490.7

Results on the validation set using Logistic model

pre <- predict(mlog,dataBrestCancerTest)
rdata <- cbind(dataBrestCancerTest$status,pre)

rrAnalysis <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Logistic Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=5)

par(op)

Validation Report

pander::pander(t(rrAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.541 @:0.431 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.542 0.431 0.439 0.306 2.31e-01 0.4996
RR 1.792 1.702 1.756 2.678 2.20e+01 1.7318
RR_LCI 1.529 1.428 1.477 1.679 4.75e-02 1.4731
RR_UCI 2.100 2.029 2.088 4.271 1.02e+04 2.0360
SEN 0.395 0.595 0.579 0.950 1.00e+00 0.4482
SPE 0.832 0.638 0.669 0.181 1.29e-02 0.7804
BACC 0.613 0.617 0.624 0.565 5.06e-01 0.6143
NetBenefit 0.060 0.105 0.106 0.210 2.69e-01 0.0717
pander::pander(t(rrAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.03 0.921 1.16 0.556
pander::pander(rrAnalysis$c.index,caption="C. Index")
  • C Index: 0.669

  • Dxy: 0.338

  • S.D.: 0.0309

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 178115

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.669 0.669 0.639 0.699
pander::pander(t(rrAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.669 0.628 0.709
pander::pander((rrAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.395 0.339 0.453
pander::pander((rrAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.832 0.791 0.868
pander::pander(t(rrAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.541 0.431
pander::pander(rrAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 92.507991 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 369 121 181.7 20.2997 52.3868
class=1 134 60 61.7 0.0479 0.0604
class=2 183 118 55.5 70.2342 88.0195

Logistic Model Poisson Calibration

riskdata <- cbind(dataBrestCancerTrain$status,predict(mlog,dataBrestCancerTrain,type="prob"),dataBrestCancerTrain$time)
calprob <- CalibrationProbPoissonRisk(riskdata)

( 7.438575 , 26976.42 , 1.110714 , 1518 , 1838.456 )

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Logistic Calibration Parameters")
h0 Gain DeltaTime
0.689 1.33 7.44
timeinterval <- calprob$timeInterval;
gain <- calprob$hazardGain

rdata <- cbind(dataBrestCancerTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Cal. Logistic Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

Report of the calibrated logistic: training

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6463 0.528 0.486 0.324 0.170289 0.500
RR 1.7654 1.739 1.799 2.213 1.000000 1.764
RR_LCI 1.6587 1.627 1.676 1.764 0.000000 1.648
RR_UCI 1.8790 1.858 1.931 2.777 0.000000 1.889
SEN 0.3267 0.470 0.566 0.962 1.000000 0.519
SPE 0.8996 0.799 0.731 0.125 0.000683 0.765
BACC 0.6132 0.635 0.648 0.543 0.500342 0.642
NetBenefit 0.0763 0.129 0.163 0.284 0.408374 0.149
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.914 0.868 0.961 0.000374
pander::pander(rrAnalysisTrain$c.index,caption="C. Index")
  • C Index: 0.68

  • Dxy: 0.36

  • S.D.: 0.014

  • n: 2982

  • missing: 0

  • uncensored: 1518

  • Relevant Pairs: 6184528

  • Concordant: 4206585

  • Uncertain: 2703838

  • cstatCI:

    mean.C Index median lower upper
    0.68 0.68 0.666 0.693
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.695 0.677 0.714
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.327 0.303 0.351
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.645 0.528
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 541.976716 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1974 804 1144 100.9 415.3
class=1 365 218 170 13.4 15.1
class=2 643 496 204 418.2 490.7
probLog <- predict(mlog,dataBrestCancerTest)
aprob <- adjustProb(probLog,gain)

rdata <- cbind(dataBrestCancerTest$status,aprob)
rrAnalysisTestLogistic <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Cal. Logistic Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

Report of the calibrated validation

pander::pander(t(rrAnalysisTestLogistic$keyPoints),caption="Threshold values")
Threshold values
  @:0.645 @:0.528 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.645672 0.5277 0.5360 0.385 2.94e-01 0.5001
RR 1.791927 1.7024 1.7562 2.678 2.20e+01 1.7232
RR_LCI 1.529135 1.4283 1.4771 1.679 4.75e-02 1.4313
RR_UCI 2.099881 2.0290 2.0880 4.271 1.02e+04 2.0746
SEN 0.394649 0.5953 0.5786 0.950 1.00e+00 0.6555
SPE 0.832041 0.6382 0.6693 0.181 1.29e-02 0.5762
BACC 0.613345 0.6168 0.6239 0.565 5.06e-01 0.6159
NetBenefit -0.000601 0.0315 0.0367 0.125 2.04e-01 0.0466
pander::pander(t(rrAnalysisTestLogistic$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.16 1.03 1.3 0.0128
pander::pander(rrAnalysisTestLogistic$c.index,caption="C. Index")
  • C Index: 0.669

  • Dxy: 0.338

  • S.D.: 0.0309

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 178115

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.669 0.67 0.64 0.702
pander::pander(t(rrAnalysisTestLogistic$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.669 0.628 0.709
pander::pander((rrAnalysisTestLogistic$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.395 0.339 0.453
pander::pander((rrAnalysisTestLogistic$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.832 0.791 0.868
pander::pander(t(rrAnalysisTestLogistic$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.645 0.528
pander::pander(rrAnalysisTestLogistic$surdif,caption="Logrank test")
Logrank test Chisq = 92.507991 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 369 121 181.7 20.2997 52.3868
class=1 134 60 61.7 0.0479 0.0604
class=2 183 118 55.5 70.2342 88.0195

Comparing the COX and Logistic Models on the Independent Data

pander::pander(t(rrCoxTestAnalysis$OAcum95ci))
mean 50% 2.5% 97.5%
1.24 1.24 1.24 1.25
pander::pander(t(rrAnalysisTestLogistic$OAcum95ci))
mean 50% 2.5% 97.5%
0.943 0.943 0.94 0.947
pander::pander(t(rrCoxTestAnalysis$OE95ci))
mean 50% 2.5% 97.5%
1.22 1.22 1.19 1.25
pander::pander(t(rrAnalysisTestLogistic$OE95ci))
mean 50% 2.5% 97.5%
0.981 0.982 0.958 1
maxobs <- sum(dataBrestCancerTest$status)

par(mfrow=c(1,2),cex=0.75)

plot(rrCoxTestAnalysis$CumulativeOvs[,1:2],type="l",lty=1,
     main="Cumulative Probability",
     xlab="Observed",
     ylab="Cumulative Probability",
     ylim=c(0,maxobs),
     xlim=c(0,maxobs))
lines(rrAnalysisTestLogistic$CumulativeOvs[,1:2],lty=2,col="red")
lines(x=c(0,maxobs),y=c(0,maxobs),lty=3,col="gray")
legend("topleft",legend = c("Cox","Logistic","Ideal"),
       col=c("black","red","gray"),
       lty=c(1,2,3),
       cex=0.75
)


plot(rrCoxTestAnalysis$CumulativeOvs$Observed,
     rrCoxTestAnalysis$CumulativeOvs$Cumulative-
       rrCoxTestAnalysis$CumulativeOvs$Observed,
     main="Cumulative Risk Difference",
     xlab="Observed",
     ylab="Cumulative Risk - Observed",
     type="l",
     lty=1)
lines(rrAnalysisTestLogistic$CumulativeOvs$Observed,
     rrAnalysisTestLogistic$CumulativeOvs$Cumulative-
       rrAnalysisTestLogistic$CumulativeOvs$Observed,
     lty=2,
     col="red")
legend("topleft",legend = c("Cox","Logistic"),
       col=c("black","red"),
       lty=c(1,2),
       cex=0.75
)

plot(rrCoxTestAnalysis$OEData[,2:3],type="l",lty=1,
     main="Expected over Time",
     xlab="Observed",
     ylab="Expected",
     ylim=c(0,maxobs),
     xlim=c(0,maxobs))
lines(rrAnalysisTestLogistic$OEData[,2:3],lty=2,col="red")
lines(x=c(0,maxobs),y=c(0,maxobs),lty=3,col="gray")
legend("topleft",legend = c("Cox","Logistic","Ideal"),
       col=c("black","red","gray"),
       lty=c(1,2,3),
       cex=0.75
)

plot(rrCoxTestAnalysis$OEData$Observed,
     rrCoxTestAnalysis$OEData$Expected-
       rrCoxTestAnalysis$OEData$Observed,
     main="Expected vs Observed Difference",
     xlab="Observed",
     ylab="Cumulative - Observed",
     type="l",
     lty=1)
lines(rrAnalysisTestLogistic$OEData$Observed,
     rrAnalysisTestLogistic$OEData$Expected-
       rrAnalysisTestLogistic$OEData$Observed,
     lty=2,col="red")

legend("bottomleft",legend = c("Cox","Logistic"),
       col=c("black","red"),
       lty=c(1,2),
       cex=0.75
)

par(op)